##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.6.2
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
The TCGA MAF summary file
maf_file <- "/media/theron/My_Passport/TCGA_junctions/maf_summary.txt"
mc3_maf = read.table(maf_file,header=T)
mc3_maf$Tumor_Sample_ID <- vapply(TCGAbarcode(mc3_maf$Tumor_Sample_Barcode,sample=T),
function(val){substr(val,1,nchar(val)-1)},
character(1))
rownames(mc3_maf) <- mc3_maf$Tumor_Sample_Barcode
mc3_maf$participant_ID <- TCGAbarcode(mc3_maf$Tumor_Sample_Barcode,participant=T)
junc_rse_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/CHOL/juncrse.rds"
junc_rse <- readRDS(junc_rse_file)
junc_metadata <- as.data.frame(junc_rse@colData@listData)
junc_rse_cols <- colnames(junc_metadata)
tumor_data_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/filenames.txt"
tumor_data <- read.table(tumor_data_file)
cancers <- basename(tumor_data$V1)
# TMB<-list()
cluster_metrics_tum <- data.frame(cancers)
for (i in seq(nrow(tumor_data))){
print(sprintf("%d out of %d",i,nrow(tumor_data)))
tumor_dir <- tumor_data[i,]
cancer <- basename(tumor_dir)
print(cancer)
tumor_meta_file <- sprintf("%s/%s_metadata.txt",tumor_dir,cancer)
tumor_meta <- read.table(tumor_meta_file,quote="",sep="\t")
tumor_meta$participant_ID <- TCGAbarcode(tumor_meta[,4],participant=T)
tumor_meta$nbases<-tumor_meta[,ncol(tumor_meta)-9]
mc3_maf_small<-subset(mc3_maf,participant_ID %in% tumor_meta$participant_ID)
mc3_maf_small <- mc3_maf_small[complete.cases(mc3_maf_small),]
mc3_maf_small$type <- vapply(rownames(mc3_maf_small),function(barcode){
type<-as.numeric(substr(strsplit(barcode,"-")[[1]][4],1,2))
if (type <= 9){
return("T")
} else if (type > 9 & type <= 19){
return ("N")
} else {
return ("C")
}
},character(1))
mc3_maf_small$TMB<-log10(mc3_maf_small$total+1)
mc3_maf_small <- mc3_maf_small %>% dplyr::filter(type == "T")
tumor_geno_file <- sprintf("%s/%s_genotypes.txt",tumor_dir,cancer)
tumor_geno <- read.table(tumor_geno_file,header=T)
splice_mut_file <- sprintf("%s/%s_splice_dat_clusters_filt_ann.rds",tumor_dir,cancer)
splice_mut_data <- readRDS(splice_mut_file)
colnames(splice_mut_data) <- vapply(colnames(splice_mut_data),function(col_name){
col_name<-str_replace(col_name,"X","")
col_name <- str_replace_all(col_name,"[.]","-")
tumor_geno$sample_id[which(tumor_geno$external_id == col_name)[1]]
},character(1))
mc3_maf_small <- mc3_maf_small %>% dplyr::filter(Tumor_Sample_ID %in% colnames(splice_mut_data))
splice_mut_data<-splice_mut_data[,mc3_maf_small$Tumor_Sample_ID]
splice_mut_per_sample<-data.frame(colnames(splice_mut_data))
splice_mut_per_sample$av <- apply(splice_mut_data,2,mean)
splice_mut_per_sample$med <- apply(splice_mut_data,2,median)
splice_mut_per_sample$TMB <- mc3_maf_small$TMB
splice_mut_per_sample$cancer <- cancer
colnames(splice_mut_per_sample) <- c("sample","splice_mut_av","splice_mut_med","TMB","cancer")
cluster_metrics_tum[cluster_metrics_tum$cancers==cancer,"splice_imm_med"] <- median(splice_mut_per_sample$splice_mut_med[splice_mut_per_sample$splice_mut_med>0])
cluster_metrics_tum[cluster_metrics_tum$cancers==cancer,"splice_imm_av"] <- mean(splice_mut_per_sample$splice_mut_av)
cluster_metrics_tum[cluster_metrics_tum$cancers==cancer,"TMB"] <- median(splice_mut_per_sample$TMB)
if (i == 1){
splice_mut_per_sample_all <- splice_mut_per_sample
} else {
splice_mut_per_sample_all <- rbind(splice_mut_per_sample_all,splice_mut_per_sample)
}
splice_mut_data_mat<-as.matrix(log10(splice_mut_data+1))
colnames(splice_mut_data_mat)<-colnames(splice_mut_data)
print(Heatmap(splice_mut_data_mat,
top_annotation = HeatmapAnnotation(TMB=anno_barplot(mc3_maf_small$TMB)),
show_row_names=F,
show_column_names = F,
cluster_rows=T,
cluster_columns=T))
print(ggplot(splice_mut_per_sample,aes(x=splice_mut_av,y=TMB))+
geom_point()+
stat_cor(method = "spearman")+
geom_smooth(method="lm")+
labs(title=sprintf(cancer)))
print(ggplot(splice_mut_per_sample,aes(x=splice_mut_med,y=TMB))+
geom_point()+
stat_cor(method = "spearman")+
geom_smooth(method="lm")+
labs(title=sprintf(cancer)))
splice_mut_per_sample <- splice_mut_per_sample %>% dplyr::filter(splice_mut_med>0)
print(ggplot(splice_mut_per_sample,aes(x=splice_mut_med,y=TMB))+
geom_point()+
stat_cor(method = "spearman")+
geom_smooth(method="lm")+
labs(title=sprintf(cancer)))
}
## [1] "1 out of 14"
## [1] "BLCA"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "2 out of 14"
## [1] "BRCA"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "3 out of 14"
## [1] "CHOL"
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "4 out of 14"
## [1] "COAD"
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "5 out of 14"
## [1] "HNSC"
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "6 out of 14"
## [1] "KICH"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "7 out of 14"
## [1] "KIRP"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "8 out of 14"
## [1] "LIHC"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "9 out of 14"
## [1] "LUAD"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "10 out of 14"
## [1] "LUSC"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "11 out of 14"
## [1] "PRAD"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "12 out of 14"
## [1] "READ"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "13 out of 14"
## [1] "THCA"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "14 out of 14"
## [1] "UCEC"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
ggplot(cluster_metrics_tum,aes(x=log10(splice_imm_med+1),y=TMB,label=cancers))+
geom_text()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
xlab("splicing antigenicity median")
ggplot(cluster_metrics_tum,aes(x=log10(splice_imm_av+1),y=TMB,label=cancers))+
geom_text()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
xlab("splicing antigenicity average")
ggplot(splice_mut_per_sample_all,aes(x=splice_mut_av,y=TMB))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")+
labs("All Samples")
## `geom_smooth()` using formula 'y ~ x'
ggplot(splice_mut_per_sample_all,aes(x=splice_mut_med,y=TMB))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")+
labs("All Samples")
## `geom_smooth()` using formula 'y ~ x'
splice_mut_per_sample_all_filt <- splice_mut_per_sample_all %>% dplyr::filter(splice_mut_med>0)
ggplot(splice_mut_per_sample_all_filt,aes(x=splice_mut_med,y=TMB))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")+
labs("All Samples, No Zero Median samples")
## `geom_smooth()` using formula 'y ~ x'
junc_rse_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/CHOL/juncrse.rds"
junc_rse <- readRDS(junc_rse_file)
junc_metadata <- as.data.frame(junc_rse@colData@listData)
junc_rse_cols <- colnames(junc_metadata)
tumor_data_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/filenames.txt"
tumor_data <- read.table(tumor_data_file)
cancers <- basename(tumor_data$V1)
# TMB<-list()
cluster_metrics_tum <- data.frame(cancers)
for (i in seq(nrow(tumor_data))){
print(sprintf("%d out of %d",i,nrow(tumor_data)))
tumor_dir <- tumor_data[i,]
cancer <- basename(tumor_dir)
print(cancer)
tumor_meta_file <- sprintf("%s/%s_metadata.txt",tumor_dir,cancer)
tumor_meta <- read.table(tumor_meta_file,quote="",sep="\t")
tumor_meta$participant_ID <- TCGAbarcode(tumor_meta[,4],participant=T)
tumor_meta$nbases<-tumor_meta[,ncol(tumor_meta)-9]
mc3_maf_small<-subset(mc3_maf,participant_ID %in% tumor_meta$participant_ID)
mc3_maf_small <- mc3_maf_small[complete.cases(mc3_maf_small),]
mc3_maf_small$type <- vapply(rownames(mc3_maf_small),function(barcode){
type<-as.numeric(substr(strsplit(barcode,"-")[[1]][4],1,2))
if (type <= 9){
return("T")
} else if (type > 9 & type <= 19){
return ("N")
} else {
return ("C")
}
},character(1))
mc3_maf_small$TMB<-log10(mc3_maf_small$total+1)
mc3_maf_small <- mc3_maf_small %>% dplyr::filter(type == "T")
tumor_geno_file <- sprintf("%s/%s_genotypes.txt",tumor_dir,cancer)
tumor_geno <- read.table(tumor_geno_file,header=T)
splice_mut_file <- sprintf("%s/%s_splice_dat_clusters_filt.rds",tumor_dir,cancer)
splice_mut_data <- readRDS(splice_mut_file)
colnames(splice_mut_data) <- vapply(colnames(splice_mut_data),function(col_name){
col_name<-str_replace(col_name,"X","")
col_name <- str_replace_all(col_name,"[.]","-")
tumor_geno$sample_id[which(tumor_geno$external_id == col_name)[1]]
},character(1))
mc3_maf_small <- mc3_maf_small %>% dplyr::filter(Tumor_Sample_ID %in% colnames(splice_mut_data))
splice_mut_data<-splice_mut_data[,mc3_maf_small$Tumor_Sample_ID]
splice_mut_per_sample<-data.frame(colnames(splice_mut_data))
splice_mut_per_sample$av <- apply(splice_mut_data,2,mean)
splice_mut_per_sample$med <- apply(splice_mut_data,2,median)
splice_mut_per_sample$TMB <- mc3_maf_small$TMB
splice_mut_per_sample$cancer <- cancer
colnames(splice_mut_per_sample) <- c("sample","splice_mut_av","splice_mut_med","TMB","cancer")
cluster_metrics_tum[cluster_metrics_tum$cancers==cancer,"splice_imm_med"] <- median(splice_mut_per_sample$splice_mut_med[splice_mut_per_sample$splice_mut_med>0])
cluster_metrics_tum[cluster_metrics_tum$cancers==cancer,"splice_imm_av"] <- mean(splice_mut_per_sample$splice_mut_av)
cluster_metrics_tum[cluster_metrics_tum$cancers==cancer,"TMB"] <- median(splice_mut_per_sample$TMB)
if (i == 1){
splice_mut_per_sample_all <- splice_mut_per_sample
} else {
splice_mut_per_sample_all <- rbind(splice_mut_per_sample_all,splice_mut_per_sample)
}
splice_mut_data_mat<-as.matrix(log10(splice_mut_data+1))
colnames(splice_mut_data_mat)<-colnames(splice_mut_data)
print(Heatmap(splice_mut_data_mat,
top_annotation = HeatmapAnnotation(TMB=anno_barplot(mc3_maf_small$TMB)),
show_row_names=F,
show_column_names = F,
cluster_rows=T,
cluster_columns=T))
print(ggplot(splice_mut_per_sample,aes(x=splice_mut_av,y=TMB))+
geom_point()+
stat_cor(method = "spearman")+
geom_smooth(method="lm")+
labs(title=sprintf(cancer)))
print(ggplot(splice_mut_per_sample,aes(x=splice_mut_med,y=TMB))+
geom_point()+
stat_cor(method = "spearman")+
geom_smooth(method="lm")+
labs(title=sprintf(cancer)))
splice_mut_per_sample <- splice_mut_per_sample %>% dplyr::filter(splice_mut_med>0)
print(ggplot(splice_mut_per_sample,aes(x=splice_mut_med,y=TMB))+
geom_point()+
stat_cor(method = "spearman")+
geom_smooth(method="lm")+
labs(title=sprintf(cancer)))
}
## [1] "1 out of 14"
## [1] "BLCA"
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "2 out of 14"
## [1] "BRCA"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "3 out of 14"
## [1] "CHOL"
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "4 out of 14"
## [1] "COAD"
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "5 out of 14"
## [1] "HNSC"
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "6 out of 14"
## [1] "KICH"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "7 out of 14"
## [1] "KIRP"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "8 out of 14"
## [1] "LIHC"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "9 out of 14"
## [1] "LUAD"
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "10 out of 14"
## [1] "LUSC"
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "11 out of 14"
## [1] "PRAD"
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "12 out of 14"
## [1] "READ"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "13 out of 14"
## [1] "THCA"
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## [1] "14 out of 14"
## [1] "UCEC"
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
ggplot(cluster_metrics_tum,aes(x=splice_imm_med,y=TMB,label=cancers))+
geom_text()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
xlab("splicing antigenicity median")
ggplot(cluster_metrics_tum,aes(x=splice_imm_av,y=TMB,label=cancers))+
geom_text()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
xlab("splicing antigenicity average")
ggplot(splice_mut_per_sample_all,aes(x=splice_mut_av,y=TMB))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")+
labs("All Samples")
## `geom_smooth()` using formula 'y ~ x'
ggplot(splice_mut_per_sample_all,aes(x=splice_mut_med,y=TMB))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")+
labs(sprintf("All Samples: %d",nrow(splice_mut_per_sample_all)))
## `geom_smooth()` using formula 'y ~ x'
splice_mut_per_sample_all_filt <- splice_mut_per_sample_all %>% dplyr::filter(splice_mut_med>0)
ggplot(splice_mut_per_sample_all_filt,aes(x=splice_mut_med,y=TMB))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")+
labs(sprintf("No Zero Median samples: %d",nrow(splice_mut_per_sample_all_filt)))
## `geom_smooth()` using formula 'y ~ x'
Looking at cibersort data per sample
cibersort_file <- "/media/theron/My_Passport/TCGA_junctions/ext_dat/TCGA.Kallisto.fullIDs.cibersort.relative.tsv"
cibersort_data<-read.table(cibersort_file,header=T)
cibersort_data$sample <- vapply(TCGAbarcode(str_replace_all(cibersort_data$SampleID,"[.]","-"),sample=T),
function(sample){substr(sample,1,nchar(sample)-1)},character(1))
rownames(splice_mut_per_sample_all)<-splice_mut_per_sample_all$sample
cibersort_data_filt <- cibersort_data %>% dplyr::filter(sample %in% splice_mut_per_sample_all$sample)
cibersort_append<-lapply(cibersort_data_filt$sample,function(ID){
a<-which(splice_mut_per_sample_all$sample == ID)
splice_data_TMB <- as.numeric(splice_mut_per_sample_all[a,c("splice_mut_av","splice_mut_med","TMB")])
})
cancers<-vapply(cibersort_data_filt$sample,function(ID){
a<-which(splice_mut_per_sample_all$sample == ID)
cancer <- splice_mut_per_sample_all[a,"cancer"]
},character(1))
cibersort_append <- data.frame(matrix(unlist(cibersort_append),byrow=T,nrow=nrow(cibersort_data_filt)))
cibersort_append$cancer <- cancers
colnames(cibersort_append)<-c("splice_mut_av","splice_mut_med","TMB","cancer")
cibersort_data_filt <- cbind(cibersort_data_filt,cibersort_append)
Naive B-cells
ggplot(cibersort_data_filt,aes(x=TMB,y=B.cells.naive))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=B.cells.naive))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=B.cells.naive))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=B.cells.memory))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=B.cells.memory))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=B.cells.memory))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=Plasma.cells))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=Plasma.cells))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=Plasma.cells))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=T.cells.CD8))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=T.cells.CD8))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=T.cells.CD8))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=T.cells.CD4.naive))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=T.cells.CD4.naive))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=T.cells.CD4.naive))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=T.cells.CD4.memory.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=T.cells.CD4.memory.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=T.cells.CD4.memory.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=T.cells.CD4.memory.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=T.cells.CD4.memory.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=T.cells.CD4.memory.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=T.cells.follicular.helper))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=T.cells.follicular.helper))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=T.cells.follicular.helper))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=T.cells.regulatory..Tregs.))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=T.cells.regulatory..Tregs.))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=T.cells.regulatory..Tregs.))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=T.cells.gamma.delta))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=T.cells.gamma.delta))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=T.cells.gamma.delta))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=NK.cells.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=NK.cells.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=NK.cells.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=NK.cells.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=NK.cells.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=NK.cells.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=Monocytes))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=Monocytes))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=Monocytes))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=Macrophages.M0))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=Macrophages.M0))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=Macrophages.M0))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=Macrophages.M1))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=Macrophages.M1))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=Macrophages.M1))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=Macrophages.M2))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=Macrophages.M2))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=Macrophages.M2))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=Dendritic.cells.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=Dendritic.cells.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=Dendritic.cells.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=Dendritic.cells.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=Dendritic.cells.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=Dendritic.cells.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=Mast.cells.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=Mast.cells.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=Mast.cells.resting))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=Mast.cells.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=Mast.cells.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=Mast.cells.activated))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=Eosinophils))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=Eosinophils))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=Eosinophils))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=TMB,y=Neutrophils))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_av,y=Neutrophils))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
ggplot(cibersort_data_filt,aes(x=splice_mut_med,y=Neutrophils))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
splicing_factor_genes<-read_excel("/media/theron/My_Passport/TCGA_junctions/ext_dat/splicing_factor_genes1.xlsx")
splicing_factor_genes<-toupper(splicing_factor_genes$Gene)
write.table(data.frame(splicing_factor_genes),
file="/media/theron/My_Passport/TCGA_junctions/ext_dat/splicing_factor_genes.txt",
sep="\t",
quote=F,
col.names=F,
row.names=F)
splice_maf<-read.maf("/media/theron/My_Passport/TCGA_junctions/splice_factor.maf")
splice_maf_samp<-getSampleSummary(splice_maf)
splice_maf_samp$Tumor_Sample_ID <- TCGAbarcode(as.character(splice_maf_samp$Tumor_Sample_Barcode),sample=T)
splice_maf_samp$Tumor_Sample_ID <- vapply(splice_maf_samp$Tumor_Sample_ID,function(samp){
substr(samp,1,nchar(samp)-1)
},character(1))
splice_maf_samp$participant_ID <- TCGAbarcode(as.character(splice_maf_samp$Tumor_Sample_Barcode),participant=T)
rownames(splice_maf_samp)<-splice_maf_samp$Tumor_Sample_Barcode
splice_maf_samp<-data.frame(splice_maf_samp)
mc3_maf <- splice_maf_samp
rownames(mc3_maf) <- mc3_maf$Tumor_Sample_Barcode
junc_rse_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/CHOL/juncrse.rds"
junc_rse <- readRDS(junc_rse_file)
junc_metadata <- as.data.frame(junc_rse@colData@listData)
junc_rse_cols <- colnames(junc_metadata)
tumor_data_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/filenames.txt"
tumor_data <- read.table(tumor_data_file)
cancers <- basename(tumor_data$V1)
# TMB<-list()
cluster_metrics_tum <- data.frame(cancers)
for (i in seq(nrow(tumor_data))){
print(sprintf("%d out of %d",i,nrow(tumor_data)))
tumor_dir <- tumor_data[i,]
cancer <- basename(tumor_dir)
print(cancer)
tumor_meta_file <- sprintf("%s/%s_metadata.txt",tumor_dir,cancer)
tumor_meta <- read.table(tumor_meta_file,quote="",sep="\t")
tumor_meta$participant_ID <- TCGAbarcode(tumor_meta[,4],participant=T)
tumor_meta$nbases<-tumor_meta[,ncol(tumor_meta)-9]
mc3_maf_small<-subset(mc3_maf,participant_ID %in% tumor_meta$participant_ID)
mc3_maf_small <- mc3_maf_small[complete.cases(mc3_maf_small),]
mc3_maf_small$type <- vapply(rownames(mc3_maf_small),function(barcode){
type<-as.numeric(substr(strsplit(barcode,"-")[[1]][4],1,2))
if (type <= 9){
return("T")
} else if (type > 9 & type <= 19){
return ("N")
} else {
return ("C")
}
},character(1))
mc3_maf_small$TMB<-log10(mc3_maf_small$total+1)
mc3_maf_small <- mc3_maf_small %>% dplyr::filter(type == "T")
tumor_geno_file <- sprintf("%s/%s_genotypes.txt",tumor_dir,cancer)
tumor_geno <- read.table(tumor_geno_file,header=T)
splice_mut_file <- sprintf("%s/%s_splice_dat_clusters_filt.rds",tumor_dir,cancer)
splice_mut_data <- readRDS(splice_mut_file)
colnames(splice_mut_data) <- vapply(colnames(splice_mut_data),function(col_name){
col_name<-str_replace(col_name,"X","")
col_name <- str_replace_all(col_name,"[.]","-")
tumor_geno$sample_id[which(tumor_geno$external_id == col_name)[1]]
},character(1))
mc3_maf_small <- mc3_maf_small %>% dplyr::filter(Tumor_Sample_ID %in% colnames(splice_mut_data))
splice_mut_data<-splice_mut_data[,mc3_maf_small$Tumor_Sample_ID]
splice_mut_per_sample<-data.frame(colnames(splice_mut_data))
splice_mut_per_sample$av <- apply(splice_mut_data,2,mean)
splice_mut_per_sample$med <- apply(splice_mut_data,2,median)
splice_mut_per_sample$TMB <- mc3_maf_small$TMB
# print(Heatmap(splice_mut_data_mat,
# top_annotation = HeatmapAnnotation(TMB=anno_barplot(mc3_maf_small$TMB)),
# show_row_names=F,
# show_column_names = F,
# cluster_rows=T,
# cluster_columns=T))
colnames(splice_mut_per_sample) <- c("sample","splice_mut_av","splice_mut_med","TMB")
cluster_metrics_tum[cluster_metrics_tum$cancers==cancer,"splice_imm_med"] <- median(splice_mut_per_sample$splice_mut_med[splice_mut_per_sample$splice_mut_med>0])
cluster_metrics_tum[cluster_metrics_tum$cancers==cancer,"splice_imm_av"] <- mean(splice_mut_per_sample$splice_mut_av)
cluster_metrics_tum[cluster_metrics_tum$cancers==cancer,"TMB"] <- median(splice_mut_per_sample$TMB)
if (i == 1){
splice_mut_per_sample_all <- splice_mut_per_sample
} else {
splice_mut_per_sample_all <- rbind(splice_mut_per_sample_all,splice_mut_per_sample)
}
splice_mut_data_mat<-as.matrix(log10(splice_mut_data+1))
colnames(splice_mut_data_mat)<-colnames(splice_mut_data)
# print(Heatmap(splice_mut_data_mat,
# top_annotation = HeatmapAnnotation(TMB=anno_barplot(mc3_maf_small$TMB)),
# show_row_names=F,
# show_column_names = F,
# cluster_rows=T,
# cluster_columns=T))
print(ggplot(splice_mut_per_sample,aes(x=splice_mut_av,y=TMB))+
geom_point()+
stat_cor(method = "spearman")+
geom_smooth(method="lm")+
labs(title=sprintf(cancer)))
splice_mut_per_sample <- splice_mut_per_sample %>% dplyr::filter(splice_mut_med>0)
print(ggplot(splice_mut_per_sample,aes(x=splice_mut_med,y=TMB))+
geom_point()+
stat_cor(method = "spearman")+
geom_smooth(method="lm")+
labs(title=sprintf(cancer)))
}
ggplot(cluster_metrics_tum,aes(x=log10(splice_imm_med+1),y=TMB,label=cancers))+
geom_text()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
xlab("splicing antigenicity median")
ggplot(cluster_metrics_tum,aes(x=log10(splice_imm_av+1),y=TMB,label=cancers))+
geom_text()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
xlab("splicing antigenicity average")
ggplot(splice_mut_per_sample_all,aes(x=splice_mut_av,y=TMB))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")+
labs("All Samples")
ggplot(splice_mut_per_sample_all,aes(x=splice_mut_med,y=TMB))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")+
labs("All Samples")
splice_mut_per_sample_all <- splice_mut_per_sample_all %>% dplyr::filter(splice_mut_med>0)
ggplot(splice_mut_per_sample_all,aes(x=splice_mut_med,y=TMB))+
geom_point()+
stat_cor(method = "spearman",
label.x.npc = "center",
label.y.npc = "top")+
geom_smooth(method="lm")+
labs("All Samples, No Zero Median samples")
splice_maf_data <- splice_maf@data
splice_maf_data <- splice_maf_data %>% dplyr::filter(Hugo_Symbol %in% splicing_factor_genes)
splice_maf_data <- splice_maf_data[,c("Hugo_Symbol","Variant_Classification","Tumor_Sample_Barcode")]
splice_maf_data$Tumor_Sample_ID <- vapply(TCGAbarcode(as.character(splice_maf_data$Tumor_Sample_Barcode),sample=T),function(val){substr(val,1,nchar(val)-1)},character(1))
splice_maf_data$participant_ID <- TCGAbarcode(as.character(splice_maf_data$Tumor_Sample_Barcode),participant=T)
splice_maf_data$cancer <- NA
splice_maf_data_fill <- splice_maf_data
splice_maf_data_fill$cancer <- as.character(splice_maf_data_fill$cancer)
splice_maf_data_fill[,c("splice_ant_av","splice_ant_med")] <- data.frame(t(vapply(splice_maf_data_fill$Tumor_Sample_ID,function(ID){
a<-which(splice_mut_per_sample_all$sample == ID)
if (length(a)==0){
return(c(0,0))
} else {
return(as.numeric(splice_mut_per_sample_all[a,c("splice_mut_av","splice_mut_med")]))
}
},numeric(2))))
splice_maf_data_fill$cancer <- vapply(splice_maf_data_fill$Tumor_Sample_ID,function(ID){
a<-which(splice_mut_per_sample_all$sample == ID)
if (length(a)==0){
return("None")
} else {
return(splice_mut_per_sample_all[a,"cancer"])
}
},character(1))
junc_rse_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/CHOL/juncrse.rds"
junc_rse <- readRDS(junc_rse_file)
junc_metadata <- as.data.frame(junc_rse@colData@listData)
junc_rse_cols <- colnames(junc_metadata)
tumor_data_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/filenames.txt"
tumor_data <- read.table(tumor_data_file)
cancers <- basename(tumor_data$V1)
nogos <- c("ESCA","MESO","PAAD","KIRC","GBM")
# TMB<-list()
splice_maf_data_fill <- splice_maf_data_fill %>% dplyr::filter(cancer != "None")
sample_cancer_dat <- unique(splice_maf_data_fill[,c("Tumor_Sample_ID","cancer","splice_ant_av","splice_ant_med")])
a<-lapply(sample_cancer_dat$Tumor_Sample_ID,function(ID){
splice_maf_data_fill_small <- splice_maf_data_fill %>% dplyr::filter(Tumor_Sample_ID == ID)
vapply(splicing_factor_genes,function(gene){
count <- length(which(splice_maf_data_fill_small$Hugo_Symbol == gene))
},numeric(1))
})
sample_splice_factor_dat <- data.frame(matrix(unlist(a),nrow=nrow(sample_cancer_dat),byrow=T))
rownames(sample_splice_factor_dat) <- sample_cancer_dat$Tumor_Sample_ID
colnames(sample_splice_factor_dat) <- splicing_factor_genes
sample_splice_factor_dat_muts <- sample_splice_factor_dat[which(apply(sample_splice_factor_dat,1,sd) > 0),]
sample_cancer_dat_muts <- sample_cancer_dat[which(apply(sample_splice_factor_dat,1,sd) > 0),]
cancer_order <- order(sample_cancer_dat_muts$cancer)
sample_splice_factor_dat_muts <- sample_splice_factor_dat_muts[cancer_order,]
sample_cancer_dat_muts <- sample_cancer_dat_muts[cancer_order,]
Heatmap(log2(sample_splice_factor_dat_muts+1),
right_annotation = rowAnnotation(spliceant = anno_barplot(sample_cancer_dat_muts$splice_ant_av)),
left_annotation = rowAnnotation(cancer = sample_cancer_dat_muts$cancer),
show_row_names=F,
show_column_names = F,
cluster_rows=T,
cluster_columns=T)
Heatmap(t(scale(t(sample_splice_factor_dat_muts))),
right_annotation = rowAnnotation(spliceant = anno_barplot(sample_cancer_dat_muts$splice_ant_med)),
left_annotation = rowAnnotation(cancer = sample_cancer_dat_muts$cancer),
show_row_names=F,
show_column_names = F,
cluster_rows=T,
cluster_columns=T)
Heatmap(log2(sample_splice_factor_dat_muts+1),
right_annotation = rowAnnotation(spliceant = anno_barplot(sample_cancer_dat_muts$splice_ant_av)),
left_annotation = rowAnnotation(cancer = sample_cancer_dat_muts$cancer),
show_row_names=F,
show_column_names = F,
cluster_rows=F,
cluster_columns=T)
Heatmap(t(scale(t(sample_splice_factor_dat_muts))),
right_annotation = rowAnnotation(spliceant = anno_barplot(sample_cancer_dat_muts$splice_ant_med)),
left_annotation = rowAnnotation(cancer = sample_cancer_dat_muts$cancer),
show_row_names=F,
show_column_names = F,
cluster_rows=F,
cluster_columns=T)
sample_cancer_dat_muts$sum <- apply(sample_splice_factor_dat_muts,1,sum)
for (i in unique(sample_cancer_dat_muts$cancer)){
specific_cancer <- sample_cancer_dat_muts %>% dplyr::filter(cancer == i)
print(ggplot(specific_cancer,aes(x=log10(splice_ant+1),y=log2(sum+1)))+geom_point()+labs(title=i))
}
ggplot(sample_cancer_dat_muts,aes(x=cancer,y=log2(sum+1)))+geom_boxplot()
ggplot(sample_cancer_dat_muts,aes(x=cancer,y=log10(splice_ant+1)))+geom_boxplot()
ggplot(sample_cancer_dat_muts,aes(x=log10(splice_ant+1),y=log10(sum+1)))+
geom_point()+
stat_cor(method = "spearman")+
geom_smooth(method="lm")
split_num <- function(vals){
a<-data.frame(matrix(as.numeric(unlist(str_split(vals,"/"))),byrow=T,nrow=length(vals)))[,1]
return(a)
}
split_dom <- function(vals){
a<-data.frame(matrix(as.numeric(unlist(str_split(vals,"/"))),byrow=T,nrow=length(vals)))[,2]
return(a)
}
eval_clusters <- function(vals){
}
# splicemutr data needed per cancer type
# per-line counts needed per cancer type
# only keep those proteins that are longer than 9 kmers
tumor_data_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/filenames.txt"
tumor_data <- read.table(tumor_data_file)
cancers <- basename(tumor_data$V1)
nogos <- c("ESCA","MESO","PAAD","KIRC","GBM")
for (i in seq(nrow(tumor_data))){
print(sprintf("%d out of %d",i,nrow(tumor_data)))
tumor_dir <- tumor_data[i,]
cancer <- basename(tumor_dir)
print(cancer)
if (cancer %in% nogos){next}
splice_dat_file <- sprintf("%s/%s_splicemutr_dat.txt",tumor_dir,cancer)
splice_dat <- read.table(splice_dat_file,header=T,sep="\t")
tumor_geno_file <- sprintf("%s/%s_genotypes.txt",tumor_dir,cancer)
tumor_geno <- read.table(tumor_geno_file,header=T)
# tumor_geno <- tumor_geno %>% dplyr::filter(type == "T")
summary_file <- sprintf("%s/summaries.txt",tumor_dir)
summaries <- read.table(summary_file)
summaries <- summaries$V1
summaries<-unname(vapply(summaries,function(summ){
str_replace(summ,"kmers_summary","persamp_line")
},character(1)))
meta_file <- sprintf("%s/%s_metadata.rds",tumor_dir,cancer)
meta_dat <- readRDS(meta_file)
rownames(meta_dat) <- meta_dat$external_id
psi_file <- sprintf("%s/data_perind.counts",tumor_dir)
psi_dat <- read.table(psi_file,header=T,check.names=F)
psi_dat <- psi_dat[,c("chrom",tumor_geno$external_id)]
sample_names <- colnames(psi_dat)[seq(2,ncol(psi_dat))]
sample_names <- meta_dat[sample_names,"tcga.tcga_barcode"]
colnames(psi_dat)[seq(2,ncol(psi_dat))] <- sample_names
for (summ in seq(length(summaries))){
if (summ == 1){
summaries_combined <- read.table(summaries[summ],header=F,sep="\t")
if (length(summaries)>1){
summaries_combined <- summaries_combined[,seq(ncol(summaries_combined)-2)]
}
} else if (summ == length(summaries)){
summaries_fill <- read.table(summaries[summ],header=F,sep="\t")
summaries_combined <- cbind(summaries_combined,summaries_fill)
} else {
summaries_fill <- read.table(summaries[summ],header=F,sep="\t")
summaries_fill <- summaries_fill[,seq(ncol(summaries_fill)-2)]
summaries_combined <- cbind(summaries_combined,summaries_fill)
}
}
sample_types <- sprintf("%s_%s",tumor_geno$sample_id,tumor_geno$type)
sample_types<-c(sample_types,"row","cluster")
colnames(summaries_combined)<-sample_types
tumor_cols <- which(str_detect(sample_types,"_T"))
summaries_combined <- summaries_combined[,c(tumor_cols,length(sample_types)-1,length(sample_types))]
num <- data.frame(apply(psi_dat[,seq(2,ncol(psi_dat))],2,split_num))
denom <- data.frame(apply(psi_dat[,seq(2,ncol(psi_dat))],2,split_dom))
psi <- num/denom
is.nan.data.frame <- function(x){do.call(cbind, lapply(x, is.nan))}
psi[is.nan(psi)]<-0
psi$chrom <- psi_dat$chrom
colnames(psi)[seq(1,ncol(psi)-1)] <- colnames(psi_dat)[seq(2,ncol(psi_dat))]
tumor_cols <- which(tumor_geno$type == "T")
psi <- psi[,c(tumor_cols,ncol(psi))]
psi_summary<-psi[summaries_combined$row+1,]
summaries_combined_psi <- psi_summary[,seq(1,ncol(psi_summary)-1)]*summaries_combined[,seq(1,ncol(summaries_combined)-2)]
splice_dat_specific <- splice_dat[summaries_combined$row+1,]
rows_to_keep <- which(!is.na(splice_dat_specific$peptide))
summaries_combined_psi <- summaries_combined_psi[rows_to_keep,]
summaries_combined_psi[is.na(summaries_combined_psi)]<-0
splice_dat_specific <- splice_dat_specific[rows_to_keep,]
rows_to_keep <- !(splice_dat_specific$verdict == "annotated" & splice_dat_specific$modified == "changed")
summaries_combined_psi <- summaries_combined_psi[rows_to_keep,]
summaries_combined <- summaries_combined[rows_to_keep,]
splice_dat_specific <- splice_dat_specific[rows_to_keep,]
rows_to_keep <- which(splice_dat_specific$deltapsi > 0)
summaries_combined_psi <- summaries_combined_psi[rows_to_keep,]
summaries_combined <- summaries_combined[rows_to_keep,]
splice_dat_specific <- splice_dat_specific[rows_to_keep,]
clusters <- data.frame(table(splice_dat_specific$cluster))
rownames(clusters)<-clusters$Var1
clusters$Var1<-as.character(clusters$Var1)
clusters$genes <- vapply(clusters$Var1,function(clu){
splice_dat_small <- splice_dat_specific %>% dplyr::filter(cluster == clu)
gene<-paste(unique(splice_dat_small$gene),collapse=":")
},character(1))
splice_dat_clusters <- data.frame(t(vapply(clusters$Var1,function(clu){
cluster_rows <- which(splice_dat_specific$cluster == clu)
summaries_combined_small <- summaries_combined_psi[cluster_rows,]
apply(summaries_combined_small,2,sum)/clusters[clu,"Freq"]
},numeric(ncol(summaries_combined_psi)))))
saveRDS(splice_dat_clusters,file=sprintf("%s/%s_splice_dat_clusters.rds",tumor_dir,cancer))
saveRDS(clusters,file=sprintf("%s/%s_clusters.rds",tumor_dir,cancer))
rm(clusters,
splice_dat_clusters,
splice_dat_specific,
rows_to_keep,
summaries_combined,
summaries_combined_psi,
psi_summary,psi,
tumor_cols,
sample_types,
summaries_fill,
psi_dat,
psi_file,
meta_dat,
meta_file,
summaries,
summary_file,
tumor_geno,
tumor_geno_file,
splice_dat,
splice_dat_file,
cancer,
tumor_dir)
}
tumor_data_file <- "/media/theron/My_Passport/TCGA_junctions/TCGA_cancers/filenames.txt"
tumor_data <- read.table(tumor_data_file)
cancers <- basename(tumor_data$V1)
nogos <- c("ESCA","MESO","PAAD","KIRC","GBM")
genes <- c()
for (i in seq(nrow(tumor_data))){
print(sprintf("%d out of %d",i,nrow(tumor_data)))
tumor_dir <- tumor_data[i,]
cancer <- basename(tumor_dir)
print(cancer)
if (cancer %in% nogos){next}
per_gene_data<-readRDS(file=sprintf("%s/%s_per_gene_data.rds",tumor_dir,cancer))
genes <- unique(c(genes,per_gene_data$genes))
}
per_gene_data_tot_binders <- data.frame(genes)
per_gene_data_tot_prop <- data.frame(genes)
rownames(per_gene_data_tot_binders) <- per_gene_data_tot_binders$genes
rownames(per_gene_data_tot_prop) <- per_gene_data_tot_prop$genes
cancers <- c()
for (i in seq(nrow(tumor_data))){
print(sprintf("%d out of %d",i,nrow(tumor_data)))
tumor_dir <- tumor_data[i,]
cancer <- basename(tumor_dir)
print(cancer)
if (cancer %in% nogos){next}
cancers <- c(cancers,cancer)
per_gene_data<-readRDS(file=sprintf("%s/%s_per_gene_data.rds",tumor_dir,cancer))
rownames(per_gene_data)<-per_gene_data$genes
per_gene_data_tot_binders[,cancer]<- -1
per_gene_data_tot_prop[,cancer]<- -1
per_gene_data_tot_binders[per_gene_data$genes,cancer] <- per_gene_data$median_binders
per_gene_data_tot_prop[per_gene_data$genes,cancer] <- per_gene_data$ann_prop
}
row_ann <- unname(vapply(genes,function(gene){
if (str_detect(gene,"-")){
return("Fusion")
} else {
return("Single")
}
},character(1)))
row_ann <- data.frame(row_ann)
rownames(row_ann)<-genes
per_gene_data_tot_binders <- per_gene_data_tot_binders[,seq(2,ncol(per_gene_data_tot_binders))]
per_gene_data_tot_prop <- per_gene_data_tot_prop[,seq(2,ncol(per_gene_data_tot_prop))]
per_gene_comp <- per_gene_data_tot_binders*per_gene_data_tot_prop
Heatmap(log10(per_gene_comp+1),
right_annotation = rowAnnotation(df=row_ann),
show_row_names=F,
show_column_names = T,
cluster_rows=T,
cluster_columns=T)